Chapter 5 Community composition

load("data/data.Rdata")

5.1 Taxonomy overview

5.1.1 Stacked barplot

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~ individual + time_point,  scales="free", labeller=labeller(time_point=c("Pre_antibiotics"="Pre_ant", "Post_antibiotics_1"="Post_ant_1", "Post_antibiotics_2"="Post_ant_2")) ) + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

5.1.2 Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count))

phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
tinytable_rwk25dcn1uus5zoh2i2s
phylum mean sd
p__Bacteroidota 0.524440141 0.127316510
p__Bacillota_A 0.218129853 0.075749924
p__Pseudomonadota 0.100829714 0.168802352
p__Bacillota 0.065661524 0.051944414
p__Verrucomicrobiota 0.047313902 0.048648719
p__Cyanobacteriota 0.015633063 0.021233104
p__Desulfobacterota 0.013819770 0.006889224
p__Bacillota_C 0.012453388 0.013842018
p__Bacillota_B 0.001718646 0.002287117
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    select(phylum) %>%
    pull()

phylum_summary %>%
    filter(phylum %in% phylum_arrange) %>%
    mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
        scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        theme(legend.position="none") +
        labs(y="Phylum",x="Relative abundance")

5.2 Taxonomy boxplot

5.2.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    group_by(family) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
tinytable_w710mss7ub2qab6n31r7
family mean sd
f__Bacteroidaceae 0.2111761804 0.139716297
f__Rikenellaceae 0.1262345009 0.041183911
f__Lachnospiraceae 0.1221559156 0.056348298
f__Tannerellaceae 0.0597688994 0.038763633
f__ 0.0558781360 0.053517532
f__Erysipelotrichaceae 0.0503603704 0.049830980
f__Marinifilaceae 0.0499502203 0.026876894
f__UBA932 0.0335510817 0.041786274
f__Ruminococcaceae 0.0326684282 0.026453449
f__Akkermansiaceae 0.0298155750 0.049482943
f__Oscillospiraceae 0.0271637611 0.018607679
f__Burkholderiaceae_A 0.0269794972 0.057963873
f__Gastranaerophilaceae 0.0156330633 0.021233104
f__Burkholderiaceae_B 0.0142393093 0.042253051
f__Desulfovibrionaceae 0.0138197700 0.006889224
f__Alteromonadaceae 0.0126841890 0.037279818
f__Enterobacteriaceae 0.0110005401 0.018192813
f__Butyricicoccaceae 0.0109420628 0.006933355
f__CAG-239 0.0106659324 0.014861691
f__CALVMC01 0.0098587671 0.017356533
f__Moraxellaceae 0.0087097650 0.019763859
f__Burkholderiaceae 0.0081400791 0.026997588
f__JADKCL01 0.0076424144 0.025347021
f__UBA660 0.0069343163 0.015713124
f__Acutalibacteraceae 0.0056333801 0.007365987
f__Muribaculaceae 0.0046063404 0.003466581
f__Coprobacillaceae 0.0044443621 0.004445464
f__UBA3830 0.0042073343 0.010172731
f__Mycoplasmoidaceae 0.0039224751 0.006889425
f__Flavobacteriaceae 0.0036983513 0.012266044
f__UBA3700 0.0036927654 0.012247517
f__Aeromonadaceae 0.0032090159 0.007940373
f__CHK158-818 0.0025811994 0.002580585
f__Anaerotignaceae 0.0023321053 0.002969332
f__Peptococcaceae 0.0017186456 0.002287117
f__Anaerovoracaceae 0.0015862187 0.004037860
f__UBA1829 0.0014009798 0.002725359
f__Pseudomonadaceae 0.0009940515 0.003108676
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

# Per time_point
family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~time_point)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

5.2.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,phylum,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__") %>%
  mutate(genus= sub("^g__", "", genus))

genus_summary_sort <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) 

genus_summary_sort %>%
    tt()
tinytable_zpygv8ys6k7ie25s29fr
genus mean sd
Bacteroides 0.1775249332 0.109314357
Parabacteroides 0.0554095726 0.035124121
Mucinivorans 0.0552600046 0.049550217
Odoribacter 0.0401415283 0.024399876
Alistipes 0.0397499792 0.028185247
Clostridium_Q 0.0363715276 0.020090245
Egerieousia 0.0335510817 0.041786274
SZUA-378 0.0290183038 0.040693410
JAEZVV01 0.0269794972 0.057963873
Akkermansia 0.0223731622 0.046989793
Bacteroides_G 0.0189263693 0.040746534
Paucibacter_A 0.0142393093 0.042253051
Amedibacillus 0.0141985824 0.029133436
Dielma 0.0141336576 0.016330606
Intestinimonas 0.0133394420 0.010705642
Pararheinheimera 0.0126841890 0.037279818
Pseudoflavonifractor 0.0125791301 0.011106296
Citrobacter 0.0110005401 0.018192813
Hydrogenoanaerobacterium 0.0094715505 0.010554697
14-2 0.0092323435 0.007386213
Anaerotruncus 0.0089714879 0.009209991
Acinetobacter 0.0087097650 0.019763859
Avirikenella 0.0084205153 0.010938908
Pseudoduganella 0.0081400791 0.026997588
Bilophila 0.0071557312 0.004629539
Spyradomonas 0.0070402620 0.008193530
Hungatella_A 0.0066989150 0.009041871
Tidjanibacter 0.0060839003 0.005550803
CAJGBR01 0.0056953492 0.003603446
JADFUS01 0.0056704093 0.005147440
DUWA01 0.0056452122 0.015640734
Mobilisporobacter 0.0054593818 0.007616962
JAIHAL01 0.0047986372 0.007841867
HGM05232 0.0046063404 0.003466581
Coprobacillus 0.0044443621 0.004445464
Parabacteroides_B 0.0043593269 0.004775063
CAZU01 0.0041733530 0.011570149
Mycoplasma_L 0.0039224751 0.006889425
Flavobacterium 0.0036983513 0.012266044
UMGS1251 0.0033243891 0.006216785
Negativibacillus 0.0032342216 0.004905132
Aeromonas 0.0032090159 0.007940373
Scatacola_A 0.0030305489 0.007129984
OM05-12 0.0028388258 0.003946216
Gallibacteroides 0.0025811994 0.002580585
JAAYQI01 0.0023321053 0.002969332
Hespellia 0.0021290168 0.004757466
Massiliimalia 0.0019923222 0.004302461
Intestinibacillus 0.0018143641 0.003265593
Emergencia 0.0015862187 0.004037860
UBA1829 0.0014009798 0.002725359
MGBC107952 0.0012891041 0.004275474
Evtepia 0.0012451890 0.002490315
Pseudomonas_E 0.0009940515 0.003108676
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

#Per time_point
genus_summary %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~time_point)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")